rm(list = ls())
pacman::p_load(vdemdata, fixest, modelsummary, kableExtra, nprobust, 
               dplyr, tidyr, ggplot2, wesanderson, haven, forcats, 
               viridis, stringr, data.table)

dir = dirname(rstudioapi::getSourceEditorContext()$path)
setwd(dir)

options("modelsummary_factory_default" = "kableExtra")

set.seed(11215)

### Scope conditions (Figure A1) ###############################################
################################################################################

vd <- vdemdata::vdem 
vd <- vd %>% filter(year %in% 2010:2020)
vars <- c("v2xnp_client","v2xps_party","v2xel_frefair")

sum <- vd %>% filter(e_regionpol_6C==4) %>% group_by(country_name) %>% 
  summarise(across(all_of(vars), function(x) mean(na.omit(x)))) %>% 
  mutate(country = case_when(country_name=="Democratic Republic of the Congo"~"D. R. Congo",T~country_name))
sum$country <- factor(sum$country, levels=sum$country[order(sum$v2xnp_client)])

sum <- sum %>% pivot_longer(cols=starts_with("v2")) %>% 
  mutate(lab = case_when(grepl("clien",name)~"A. Clientelism",grepl("v2xps_party",name)~"B. Party institutionalization",grepl("frefair",name)~"C. Free and fair elections"),col = as.numeric(country_name=="Liberia"))
sum$lab <- factor(sum$lab, levels=c("A. Clientelism","B. Party institutionalization","C. Free and fair elections"))

p1 <- sum %>% ggplot(aes(x=value, y=country))+theme_bw()+
  geom_point(aes(color=factor(col)))+ylab(NULL)+xlab("Index value")+facet_grid(cols=vars(lab))+theme(strip.background =element_rect(fill="white"))+
  scale_color_manual(values=wes_palette(n=2, name="Royal1"))+theme(legend.position = "none")+scale_x_continuous(breaks=c(0,0.5,1),limits=c(0,1))

sum <- vd %>% group_by(e_regionpol_6C) %>% 
  summarise(across(all_of(vars), function(x) mean(na.omit(x)))) %>% 
  mutate(region=case_when(e_regionpol_6C==1~"E Europe/Central Asia",e_regionpol_6C==2~"Latin America/Caribbean",e_regionpol_6C==3~"Middle East/N Africa",e_regionpol_6C==4~"Sub-Saharan Africa",e_regionpol_6C==5~"W Europe/N America",e_regionpol_6C==6~"Asia/Pacific"))
sum$region <- factor(sum$region, levels=sum$region[order(sum$v2xnp_client)])

sum <- sum %>% pivot_longer(cols=starts_with("v2")) %>% 
  mutate(lab = case_when(grepl("clien",name)~"A. Clientelism",grepl("v2xps_party",name)~"B. Party institutionalization",grepl("frefair",name)~"C. Free and fair elections"))
sum$lab <- factor(sum$lab, levels=c("A. Clientelism","B. Party institutionalization","C. Free and fair elections"))

p2 <- sum %>% ggplot(aes(x=value, y=region))+theme_bw()+
  geom_point()+ylab(NULL)+xlab(NULL)+facet_grid(cols=vars(lab))+theme(strip.background =element_rect(fill="white"))+
  scale_color_manual(values=wes_palette(n=2, name="Royal1"))+theme(legend.position = NULL)+scale_x_continuous(breaks=c(0,0.5,1),limits=c(0,1))

p <- egg::ggarrange(p2, p1, heights=c(1,3), padding=0, nrow=2)
ggsave("Figures/Figure A1.pdf",p,width=8,height=9.5)


### Treatment effects of rebroadcasting intervention (Figure A2) ###############
################################################################################

d <- read_dta('Data/data_voter.dta')
d2 <- read_dta('Data/data_voter_candidate.dta')

ctrls <- d %>% select(starts_with(c("ED_","r_ed_"))) %>% colnames() %>% paste(collapse="+")

gen.reg <- function(df, spec, dv, lab){
  
  dat <- df
  
  if(spec=="levels"){ fmla <- as.formula(paste(dv, "~ t_broadcast + t_invite + r_survey_day +", ctrls," + r_age + factor(r_male) | t_block + OperatorId")) }
  if(spec=="panel"){ fmla <- as.formula(paste(dv, "~ t_broadcast + t_invite + r_survey_day +", ctrls," + r_age + factor(r_male) | t_block + OperatorId")) }
  
  if(spec=="het_policy"){ fmla <- as.formula(paste(dv, "~ t_broadcast*scale(pref_match) + t_invite*scale(pref_match) + r_survey_day +", ctrls," + r_age + factor(r_male) | t_block + OperatorId")) }
  if(spec=="het_performance"){ fmla <- as.formula(paste(dv, "~ t_broadcast*scale(perf_pred) + t_invite*scale(perf_pred) + r_survey_day +", ctrls," + r_age + factor(r_male) | t_block + OperatorId")) }
  
  reg1 <- feols(fmla, data=dat, cluster="EDCODE")
  reg2 <- feols(fmla, data=dat, cluster="EDCODE", weights=1/dat$obs)
  reg3 <- feols(fmla, data=dat, cluster="EDCODE", weights=dat$reg/dat$obs)
  
  tab <- reg1$coeftable %>% as.data.frame() %>% filter(grepl("t_broadcast",rownames(.))==T & grepl("rb_",rownames(.))==F) %>% mutate(dv, wgt="No") %>% 
    bind_rows(reg2$coeftable %>% as.data.frame() %>% filter(grepl("t_broadcast",rownames(.))==T & grepl("rb_",rownames(.))==F) %>% mutate(dv, wgt="1/Obs")) %>% 
    bind_rows(reg3$coeftable %>% as.data.frame() %>% filter(grepl("t_broadcast",rownames(.))==T & grepl("rb_",rownames(.))==F) %>% mutate(dv, wgt="Reg/Obs")) %>% 
    mutate(lab=lab)
  colnames(tab)[1:4] <- c("coef","se","t","p")
  
  return(tab)
}

make.p <- function(df, gg_title, facet){
  
  df$wgt <- factor(df$wgt, levels=c("No","1/Obs","Reg/Obs"))
  df$wgt <- fct_rev(df$wgt)
  df$lab <- fct_rev(df$lab)
  
  if(facet==0){add_me <- NULL}
  if(facet==1){add_me <- facet_grid(cols=vars(type))}
  if(facet==2){add_me <- facet_grid(cols=vars(type), rows=vars(dv))}
  
  p <- ggplot(data=df, aes(group=wgt, color=wgt, y=lab))+theme_bw()+
    geom_vline(aes(xintercept=0),color="grey80")+scale_color_viridis(discrete=T, guide = guide_legend(reverse = TRUE))+ylab(NULL)+xlab(NULL)+
    geom_point(aes(x=coef), position=position_dodge2(width=0.5))+
    geom_errorbarh(aes(xmin=coef-(1.96*se), xmax=coef+(1.96*se)), position=position_dodge(0.5), height=0)+
    geom_errorbarh(aes(xmin=coef-(1.645*se), xmax=coef+(1.645*se)), position=position_dodge(0.5), height=0.25)+
    theme(legend.title = element_blank(), legend.position = NULL)+
    theme(strip.background =element_rect(fill="white"))+
    labs(subtitle = gg_title)+add_me
  p
}

# Table 6: Effects on voting outcomes #
out_1 <- gen.reg(d2 %>% filter(incumbent==1), "levels","v_cand","Rebroadcast") %>% mutate(type="Incumbent")
out_2 <- gen.reg(d2 %>% filter(incumbent==1), "het_policy","v_cand","Rebroadcast") %>% mutate(type="Incumbent") %>% 
  filter(grepl("scale",rownames(.))) %>% mutate(lab="x Std. Policy alignment")
out_3 <- gen.reg(d2 %>% filter(incumbent==1), "het_performance","v_cand","Rebroadcast") %>% mutate(type="Incumbent") %>%
  filter(grepl("scale",rownames(.))) %>% mutate(lab="x Std. Performance")
out_4 <- gen.reg(d2 %>% filter(top_chal==1), "levels","v_cand","Rebroadcast") %>% mutate(type="Challengers")
out_5 <- gen.reg(d2 %>% filter(top_chal==1), "het_policy","v_cand","Rebroadcast") %>% mutate(type="Challengers") %>% 
  filter(grepl("scale",rownames(.))) %>% mutate(lab="x Std. Policy alignment")
out_6 <- gen.reg(d2 %>% filter(top_chal==1), "het_performance","v_cand","Rebroadcast") %>% mutate(type="Challengers") %>% 
  filter(grepl("scale",rownames(.))) %>% mutate(lab="x Std. Performance")
out_tab6 <- bind_rows(out_1, out_2, out_3, out_4, out_5, out_6) %>% mutate(type=factor(type, levels=c("Incumbent","Challengers")))

# Table 8: Effects on debate exposure and information acquisition #
out_1 <- gen.reg(d, "panel","std_d_exposure","A. Debate listening")
out_2 <- gen.reg(d, "levels","std_d_knowledge","B. Debate knowledge")
out_3 <- gen.reg(d, "panel","std_p_knowledge","C. Policy knowledge")
out_4 <- gen.reg(d, "panel","std_i_demand","D. Political information\nacquisition")
out_tab8 <- bind_rows(out_1, out_2, out_3, out_4)

# Table 9: Effects on updating about candidates
out_1 <- gen.reg(d2, "panel","i_std_cand_comp_sure_d","Competence certainty (Incumbent)")
out_2 <- gen.reg(d2, "panel","i_std_cand_issue_sure_d","Policy certainty (Incumbent)")
out_3 <- gen.reg(d2, "panel","c_std_cand_comp_sure_d","Competence certainty (Challengers)")
out_4 <- gen.reg(d2, "panel","c_std_cand_issue_sure_d","Policy certainty (Challengers)")
out_5 <- gen.reg(d2, "panel","i_std_cand_comp_d","Competence beliefs (Incumbent)")
out_6 <- gen.reg(d2, "panel","i_std_cand_issue_d","Policy learning (Incumbent)")
out_7 <- gen.reg(d2, "panel","c_std_cand_comp_d","Competence beliefs (Challengers)")
out_8 <- gen.reg(d2, "panel","c_std_cand_issue_d","Policy learning (Challengers)")
out_tab9 <- bind_rows(out_1, out_2, out_3, out_4,
                      out_5, out_6, out_7, out_8)
out_tab9 <- out_tab9 %>% 
  mutate(type=case_when(grepl("Incumbent",lab)~"Incumbent",
                        grepl("Challengers",lab)~"Challengers")) %>% 
  mutate(type=factor(type, levels=c("Incumbent","Challengers"))) %>% 
  mutate(lab=gsub(" [(]Incumbent[)]| [(]Challengers[)]","",lab))

# Table 10: Effects on campaigning
out_1 <- gen.reg(d2, "levels","i_std_cand_ground","Ground (Incumbent)")
out_2 <- gen.reg(d2, "levels","i_std_cand_radio","Radio (Incumbent)")
out_3 <- gen.reg(d2, "levels","c_std_cand_ground","Ground (Challengers)")
out_4 <- gen.reg(d2, "levels","c_std_cand_radio","Radio (Challengers)")
out_tab10 <- bind_rows(out_1, out_2, out_3, out_4)
out_tab10 <- out_tab10 %>% 
  mutate(type=case_when(grepl("Incumbent",lab)~"Incumbent",
                        grepl("Challengers",lab)~"Challengers")) %>% 
  mutate(type=factor(type, levels=c("Incumbent","Challengers"))) %>% 
  mutate(lab=gsub(" [(]Incumbent[)]| [(]Challengers[)]","",lab))

p6 <- make.p(out_tab6,"Table 6. Effects on voting outcomes",facet=1)
p8 <- make.p(out_tab8,"Table 8. Effects on debate exposure and information acquisition",facet=0)
p9 <- make.p(out_tab9,"Table 9. Effects on updating about candidates",facet=1)
p10 <- make.p(out_tab10,"Table 10. Effects on campaigning",facet=1)

p <- egg::ggarrange(p6, p8, p9, p10, ncol=1) 
ggsave("Figures/Figure A2.pdf", p, width=8, height=10)


### Compliance analysis (Table 7 and Figure 2) #################################
################################################################################

### Complier characteristics (Table 7) ###

d <- read_dta("Data/data_voter_candidate.dta")
c <- read_dta("Data/data_candidates.dta")

vars <- c("attend","t_invite","pref_match","cand_issue_sure_bl")

d <- d %>% group_by(EDCODE, ID_cand, incumbent, top_chal, t_block) %>% 
  dplyr::summarise(across(all_of(vars), function(x) mean(x))) %>% 
  left_join(c %>% select(ID_cand, pref_match_cand), by="ID_cand") %>% 
  mutate(in_candidate_survey = !is.na(pref_match_cand))

d <- d %>% 
  dplyr::rename(policy_alignment = pref_match, policy_alignment_cand=pref_match_cand, policy_certainty=cand_issue_sure_bl)

d$policy_alignment <- scale(d$policy_alignment)[,1]
d$policy_certainty <- scale(d$policy_certainty)[,1]
d$policy_alignment_cand <- scale(d$policy_alignment_cand)[,1]

gen.kappa <- function(dataset, var){
  
  df <- dataset
  var.col <- df[,colnames(df)==var]
  
  df <- df[!is.na(var.col),]
  var.col <- var.col[!is.na(var.col)]
  
  fmla <- paste('t_invite~ ', var, ' +', 'I(', var, '^2)', '+', 'I(', var, '^3) + factor(t_block)', sep='')
  probit <- glm(fmla, family = binomial(link = "probit"), data = df)
  df$p <- probit$fitted.values
  
  df$kappa <- 1 - ((df$attend * (1-df$t_invite))/(1-df$p)) - (((1-df$attend) * df$t_invite)/df$p)
  df$kappa_always <- ((df$attend * (1-df$t_invite))/(1-df$p))
  df$kappa_never <- (((1-df$attend) * df$t_invite)/df$p)
  
  m_complier <- mean(df$kappa*var.col)/mean(df$kappa)
  m_always <- mean(df$kappa_always*var.col)/mean(df$kappa_always)
  m_never <- mean(df$kappa_never*var.col)/mean(df$kappa_never)
  
  out <- data.frame(var, mean=mean(var.col), complier=m_complier, always=m_always, never=m_never)
  return(out)
}

gen.kappa.both <- function(var){
  inc <- gen.kappa(d %>% filter(incumbent==1), var) %>% mutate(type="Incumbent") %>% relocate(type) 
  chal <- gen.kappa(d %>% filter(top_chal==1), var) %>% mutate(type="Challengers") %>% relocate(type) 
  return(inc %>% bind_rows(chal))
}

gen.comp <- function(var){
  if(grepl("_cand",var)==T){ dat <- d %>% filter(in_candidate_survey==1) %>% group_by(t_block, t_invite, attend, incumbent) %>% slice_sample(prop=1, replace=T) }
  if(grepl("_cand",var)==F){ dat <- d %>% group_by(t_block, t_invite, attend, incumbent) %>% slice_sample(prop=1, replace=T) }
  t1 <- gen.kappa(dat %>% filter(incumbent==1), var) %>% mutate(type="Incumbent")
  t2 <- gen.kappa(dat %>% filter(top_chal==1), var) %>% mutate(type="Challengers")
  diff_m <- t1$mean - t2$mean
  diff_always <- t1$always - t2$always
  diff_never <- t1$never - t2$never
  diff_complier <- t1$complier - t2$complier
  diff_inc_c_at <- t1$complier - t1$always
  diff_inc_c_nt <- t1$complier - t1$never
  diff_chal_c_at <- t2$complier - t2$always
  diff_chal_c_nt <- t2$complier - t2$never
  tab <- data.frame(var, diff_m, diff_complier, diff_always, diff_never, c_t=t1$complier, c_c=t2$complier, diff_inc_c_at, diff_inc_c_nt, diff_chal_c_at, diff_chal_c_nt)
  return(tab)
}

gen.bs.stats <- function(var){
  bs <- bind_rows(lapply(1:5000, function(x) gen.comp(var)))
  m <- bs %>% dplyr::summarise(across(starts_with("diff_"),mean)) %>% mutate(type="Difference") %>% 
    dplyr::rename(mean=diff_m, always=diff_always, never=diff_never, complier=diff_complier)
  sd <- bs %>% dplyr::summarise(across(starts_with("diff_"),function(x) sd(x))) %>% mutate(type="SE") %>% 
    dplyr::rename(mean=diff_m, always=diff_always, never=diff_never, complier=diff_complier)
  t <- abs(m[c(1:8)] / sd[c(1:8)])
  p <- (1-pnorm(as.numeric(t)))*2
  p <- as.data.frame(t(p))
  colnames(p) <- colnames(t)
  p$type <- "p"
  
  top <- gen.kappa.both(var)
  tab <- bind_rows(top, 
                   p[1:4]) %>% select(-var)
  colnames(tab) <- c(" ","Mean","C","AT","NT")
  tab <- as.data.frame(tab)
  tab[,1] <- c("Incumbent","Challengers","$p$(Incumbent=Challengers)")
  
  tab2 <- data.frame(c(p$diff_inc_c_at,p$diff_chal_c_at,NA), c(p$diff_inc_c_nt, p$diff_chal_c_nt,NA))
  colnames(tab2) <- c("p(C=AT)","p(C=NT)")
  
  tab <- tab %>% bind_cols(tab2)
  return(tab)
}

out <- gen.bs.stats("policy_alignment") %>% 
  bind_rows(gen.bs.stats("policy_alignment_cand")) %>% 
  bind_rows(gen.bs.stats("policy_certainty"))

out <- out %>% dplyr::mutate(across(Mean:`p(C=NT)`, function(x) str_trim(as.character(format(round(x,digits=2),nsmall=2)))))
out$`p(C=AT)` <- paste( "[", out$`p(C=AT)` , "]", sep="" )
out$`p(C=NT)` <- paste( "[", out$`p(C=NT)` , "]", sep="" )
out$`p(C=AT)`[c(3,6,9)] <- " "
out$`p(C=NT)`[c(3,6,9)] <- " "
out[3,2:5] <-  paste( "[", out[3,2:5] , "]", sep="")
out[6,2:5] <-  paste( "[", out[6,2:5] , "]", sep="")
out[9,2:5] <-  paste( "[", out[9,2:5] , "]", sep="")

gen.col.index <- function(dim){
  x <- 1:dim
  out <- c("\\textbf{A. Policy alignment (voter survey measure)}", paste(" (",x,") ",sep=""))
  return(out)
}

head <- c(" ","Mean","C","AT","NT","$p$(C=AT)","$p$(C=NT)")

rows <- as_tibble(t(c("\\midrule \\textbf{B. Policy alignment (candidate survey measure)}",rep(" ",6)))) %>% 
  bind_rows(as_tibble(t(c("\\midrule \\textbf{C. Policy certainty}",rep(" ",6)))))
attr(rows, 'position') <- c(4,8)

modelsummary::datasummary_df(col.names=gen.col.index(6), out,fmt=2, title="Characterizing compliers\\label{table_7}", format="latex", align="lrrrrrr",
                             escape=F, position="!htpb", add_rows=rows) %>% 
  add_header_above(header = head, escape=F) %>% 
  add_footnote("\\SpecCompliers", threeparttable = T, escape=F, notation="none") %>% kable_styling(position = "center", font_size = 10) %>% 
  kableExtra::save_kable("Tables/Table 7.tex", escape=F, format="latex")
  

### Distribution of compliance strata (Figure 2) ###

gen.npr <- function(var, incumbent){
  
  df <- d
  var.col <- df[,colnames(df)==var]
  df <- df[!is.na(var.col),]
  
  if(incumbent==1){df <- df[df$incumbent==1,]}
  if(incumbent==0){df <- df[df$top_chal==1,]}
  
  x <- df[,colnames(df)==var]*df$t_invite
  x_2 <- df[,colnames(df)==var]  
  
  y <- df$attend[x!=0]
  x <- x[x!=0]
  x <- scale(x)
  x_2 <- scale(x_2)
  
  est_1 <- lprobust(y, x, eval=seq(-1,1, (2/30)))
  est_1a <- lprobust(df$attend[df$t_invite==0], x_2[df$t_invite==0], eval=seq(-1,1, (2/30)))
  
  out_1 <- bind_cols(x=est_1$Estimate[,1],y=est_1$Estimate[,5]-est_1a$Estimate[,5])
  out_1$type <- 'Complier'
  
  out_2 <- bind_cols(x=est_1a$Estimate[,1],y=est_1a$Estimate[,5],
                     y_ub=est_1a$Estimate[,5]+(1.96*est_1a$Estimate[,7]),
                     y_lb=est_1a$Estimate[,5]-(1.96*est_1a$Estimate[,7]))
  out_2$type <- 'Always-taker'
  
  out <- bind_rows(out_1, out_2)
  out$var <- var
  return(out)
}

out_1 <- gen.npr('policy_alignment',1)
out_2 <- gen.npr('policy_certainty',1)
out_3 <- gen.npr('policy_alignment',0)
out_4 <- gen.npr('policy_certainty',0)

out_a <- dplyr::bind_rows(out_1, out_2)
out_a$spec <- 'Incumbent'
out_b <- dplyr::bind_rows(out_3, out_4)
out_b$spec <- 'Challenger'
out <- dplyr::bind_rows(out_a, out_b)
out$var <- as.factor(out$var)
levels(out$var) <- c('Policy alignment', 'Policy certainty')
out$var <- factor(out$var,levels(out$var)[c(1:2)])

out$spec <- as.factor(out$spec)
levels(out$spec) <- c('Challenger', 'Incumbent')
out$spec <- factor(out$spec,levels(out$spec)[c(2,1)])

p <- ggplot(data=out, aes(x=x,y=y,group=type,colour=factor(type)))+
  theme_bw()+facet_grid(cols=vars(var), rows=vars(spec))+
  geom_hline(aes(yintercept=0),colour='black')+
  geom_line(size=1.5)+
  theme(strip.background = element_rect(fill="white"))+
  scale_x_continuous(breaks=seq(-2,2,0.5), limits=c(-1,1))+
  ylim(c(-0.2, 0.8))+ylab('Probability')+xlab(NULL)+
  theme(legend.title=element_blank())+
  scale_color_viridis(discrete=T, end=0.8)

ggsave("Figures/Figure 2.pdf",p,width=5.75,height=4.75)


### Performance measures (Figures A3, A4, A5)  #################################
################################################################################

d <- read_dta("Data/data_voter_candidate.dta")

### Predicted versus observed performance among participating candidates (Figure A3) ###

d$std_cand_ground <- scale(scale(d$cand_vbuy)[,1] + scale(d$cand_campaign_leaf)[,1] + scale(d$cand_campaign_visit)[,1])[,1]
d$std_cand_radio <- scale(d$cand_radio)[,1]

vars <- c("perf_obs","perf_pred")

p <- d %>% group_by(ID_cand, t_invite) %>% dplyr::summarise(across(vars, mean)) %>% 
  mutate(treatment = case_when(t_invite==0~"Control", t_invite==1~"Intensive Invite")) %>% 
  ggplot(aes(x=perf_obs, y=perf_pred))+theme_bw()+geom_point(alpha=0.75)+
  facet_grid(cols=vars(treatment))+geom_smooth(span=1,color="cadetblue")+theme(strip.background=element_rect(fill="white"))+
  xlab("Observed debate performance")+ylab("Predicted debate performance")

ggsave("Figures/Figure A3.pdf",p,width=6,height=3.5)


### Correlation between debate performance and campaigning in control districts (Figure A5) ###

vars <- c("perf_obs","std_cand_ground","std_cand_radio")

p <- d %>% filter(t_invite==0) %>% group_by(ID_cand) %>% dplyr::summarise(across(vars, mean)) %>% 
  dplyr::mutate(across(c(perf_obs,std_cand_ground,std_cand_radio), function(x) scale(x)[,1])) %>% 
  pivot_longer(cols=starts_with("std_")) %>% 
  mutate(v = case_when(name=="std_cand_ground"~"Ground",name=="std_cand_radio"~"Radio")) %>%
  ggplot(aes(x=perf_obs, y=value))+theme_bw()+
  geom_point(alpha=0.75)+geom_smooth(color="cadetblue",span=2)+
  facet_grid(cols=vars(v))+  theme(strip.background=element_rect(fill="white"))+
  xlab("Std. Performance")+ylab("Campaigning outcome")

ggsave("Figures/Figure A5.pdf",p,width=6,height=3.5)


### Correlates of predicted performance measure (Figure A4) ###

d <- fastDummies:: dummy_cols(d, select_columns="party") 
vars_party <- c("party_UP","party_CDC","party_LP","party_ALP","party_ANC","party_Ind.")
vars_ind <- c("incumbent","cand_male","cand_comp_bl", "cand_comp_sure_bl", "pref_match", "cand_issue_sure_bl", 
              vars_party)

vars <- c("perf_pred", vars_party, vars_ind)
d <- d %>% group_by(ID_cand, EDCODE) %>% dplyr::summarise(across(vars, mean))
d$CCODE <- sapply(str_split(d$EDCODE, pattern="D"),first)

v_all <- paste("scale(",vars_ind,")",sep="")
v_all <- paste(v_all, collapse="+")
fmla <- as.formula(paste("perf_pred", "~", v_all, "| 1", sep=""))
out <- feols(fmla, data=d, cluster="EDCODE") %>% coeftable() %>% as.data.frame() %>% filter(rownames(.)!="(Intercept)")
colnames(out)[1:4] <- c("coef","se","t","p")
out$iv <- gsub("scale[(]|[)]","",rownames(out))

map <- fread("Data/question_correspondence.csv")

out <- out %>% left_join(map, by=c("iv"="var"))
out$label <- factor(out$label, levels=unique(out$label)) 
p <- out %>% ggplot(aes(y=fct_rev(label)))+theme_bw()+ylab(NULL)+xlab("Standardized coefficient")+
  geom_vline(aes(xintercept=0), color="cadetblue")+
  geom_point(aes(x=coef))+
  geom_errorbarh(aes(xmin=coef-(1.96*se), xmax=coef+(1.96*se)), height=0)+geom_errorbarh(aes(xmin=coef-(1.645*se), xmax=coef+(1.645*se)), height=0.2)

ggsave("Figures/Figure A4.pdf",p,width=6,height=4)
